# BUSCO plots all kmer sets
# setwd("~/Desktop/R/ramon/TransPi/paper/")
library(reshape2)
library(plotly)
library(dplyr)
csv=read.csv("busco3_75.csv", header=TRUE)
Complete
## Complete genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Kruskal-Wallis test was significant (p<.05)"
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: comp$Score and comp$Program
##
## Transpi
## Trinity 0.013
##
## P value adjustment method: BH
Single
## Single genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Kruskal-Wallis test was significant (p<.05)"
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: sing$Score and sing$Program
##
## Transpi
## Trinity 3.2e-16
##
## P value adjustment method: BH
Duplicated
## Duplicated genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Kruskal-Wallis test was significant (p<.05)"
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: dup$Score and dup$Program
##
## Transpi
## Trinity 4.3e-08
##
## P value adjustment method: BH
Fragmented
## Fragmented genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Missing
## Missing genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Kruskal-Wallis test was significant (p<.05)"
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: mis$Score and mis$Program
##
## Transpi
## Trinity 0.0041
##
## P value adjustment method: BH
Only TransPi
Complete
## Complete genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Single
## Single genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## singTra$Kmer 2 2.0 0.985 0.083 0.921
## Residuals 48 572.1 11.920
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = singTra$Score ~ singTra$Kmer, data = singTra2)
##
## $`singTra$Kmer`
## diff lwr upr p adj
## KmerB-KmerA 0.3411765 -2.522761 3.205114 0.9553164
## KmerC-KmerA 0.4647059 -2.399232 3.328644 0.9187644
## KmerC-KmerB 0.1235294 -2.740409 2.987467 0.9940194
Duplicated
## Duplicated genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## dupTra$Kmer 2 2.7 1.356 0.047 0.954
## Residuals 48 1377.0 28.687
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = dupTra$Score ~ dupTra$Kmer, data = dupTra2)
##
## $`dupTra$Kmer`
## diff lwr upr p adj
## KmerB-KmerA -0.2941176 -4.737159 4.148924 0.9859730
## KmerC-KmerA -0.5647059 -5.007748 3.878336 0.9493041
## KmerC-KmerB -0.2705882 -4.713630 4.172453 0.9881141
Fragmented
## Fragmented genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## fragTra$Kmer 2 0.184 0.092 0.191 0.827
## Residuals 48 23.136 0.482
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = fragTra$Score ~ fragTra$Kmer, data = fragTra2)
##
## $`fragTra$Kmer`
## diff lwr upr p adj
## KmerB-KmerA -0.07647059 -0.6523908 0.4994497 0.9448074
## KmerC-KmerA 0.07058824 -0.5053320 0.6465085 0.9527663
## KmerC-KmerB 0.14705882 -0.4288614 0.7229791 0.8112562
Missing
## Missing genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Complete
## Complete genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## Kmer 2 0.021 0.0103 0.011 0.989
## Residuals 27 24.953 0.9242
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Score ~ Kmer, data = ceTra2)
##
## $Kmer
## diff lwr upr p adj
## KmerB-KmerA 0.06 -1.005968 1.125968 0.9893241
## KmerC-KmerA 0.01 -1.055968 1.075968 0.9997018
## KmerC-KmerB -0.05 -1.115968 1.015968 0.9925730
Single
## Single genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## Kmer 2 0.2 0.117 0.008 0.992
## Residuals 27 381.4 14.124
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Score ~ Kmer, data = ceTra2)
##
## $Kmer
## diff lwr upr p adj
## KmerB-KmerA 0.21 -3.95722 4.37722 0.9914329
## KmerC-KmerA 0.15 -4.01722 4.31722 0.9956190
## KmerC-KmerB -0.06 -4.22722 4.10722 0.9992976
Duplicated
## Duplicated genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## Kmer 2 0.1 0.07 0.003 0.997
## Residuals 27 584.7 21.66
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Score ~ Kmer, data = ceTra2)
##
## $Kmer
## diff lwr upr p adj
## KmerB-KmerA -0.15 -5.310022 5.010022 0.9971403
## KmerC-KmerA -0.14 -5.300022 5.020022 0.9975084
## KmerC-KmerB 0.01 -5.150022 5.170022 0.9999873
Fragmented
## Fragmented genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Missing
## Fragmented genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## Kmer 2 0.008 0.0040 0.005 0.995
## Residuals 27 20.932 0.7753
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Score ~ Kmer, data = ceTra2)
##
## $Kmer
## diff lwr upr p adj
## KmerB-KmerA -0.02 -0.996311 0.956311 0.9985788
## KmerC-KmerA -0.04 -1.016311 0.936311 0.9943282
## KmerC-KmerB -0.02 -0.996311 0.956311 0.9985788
BUSCO and reads
## comp.Program comp.Category comp.Score comp.Reads comp.Sample
## 1 Transpi Complete 86.0 40,302,838 CE1
## 2 Transpi Complete 86.2 50,516,835 CE2
## 3 Transpi Complete 83.9 41,947,175 CE3
## 4 Transpi Complete 83.8 44,969,393 CE4
## 5 Transpi Complete 84.1 45,605,396 CE5
## 6 Transpi Complete 86.2 40,302,838 CE1
Complete
## Complete genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## Kmer 2 0.134 0.0669 0.148 0.863
## Residuals 33 14.923 0.4522
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Score ~ Kmer, data = dmTra2)
##
## $Kmer
## diff lwr upr p adj
## KmerB-KmerA 0.008333333 -0.6653044 0.6819710 0.9994921
## KmerC-KmerA -0.125000000 -0.7986377 0.5486377 0.8923866
## KmerC-KmerB -0.133333333 -0.8069710 0.5403044 0.8785582
Single
## Single genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Duplicated
## Duplicated genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Fragmented
## Fragmented genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Missing
## Missing genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## Kmer 2 0.032 0.01583 0.055 0.947
## Residuals 33 9.498 0.28783
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Score ~ Kmer, data = dmTra2)
##
## $Kmer
## diff lwr upr p adj
## KmerB-KmerA 0.066666667 -0.4707725 0.6041058 0.9502882
## KmerC-KmerA 0.058333333 -0.4791058 0.5957725 0.9616976
## KmerC-KmerB -0.008333333 -0.5457725 0.5291058 0.9992022
BUSCO and reads
## comp.Program comp.Category comp.Score comp.Reads comp.Sample
## 1 Transpi Complete 97.1 87,423,452 DM1
## 2 Transpi Complete 97.6 85,714,154 DM2
## 3 Transpi Complete 96.6 88,252,694 DM3
## 4 Transpi Complete 97.4 82,110,608 DM4
## 5 Transpi Complete 96.8 102,413,880 DM5
## 6 Transpi Complete 97.6 98,529,578 DM6
Complete
## Complete genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Single
## Single genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Duplicated
## Duplicated genes comparison
## [1] "One (or more) set is not normally distributed"
## [1] "Data not significant. Skipping pairwise comparison"
Fragmented
## Fragmented genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## Kmer 2 0.01 0.0036 0.003 0.997
## Residuals 33 47.05 1.4257
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Score ~ Kmer, data = mmTra2)
##
## $Kmer
## diff lwr upr p adj
## KmerB-KmerA 0.025000000 -1.171128 1.221128 0.9985510
## KmerC-KmerA 0.033333333 -1.162795 1.229462 0.9974255
## KmerC-KmerB 0.008333333 -1.187795 1.204462 0.9998389
Missing
## Missing genes comparison
## [1] "All sets are normally distributed"
## [1] "ANOVA"
## Df Sum Sq Mean Sq F value Pr(>F)
## Kmer 2 0.004 0.0019 0.002 0.998
## Residuals 33 30.286 0.9178
## [1] "Pairwise comparison"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Score ~ Kmer, data = mmTra2)
##
## $Kmer
## diff lwr upr p adj
## KmerB-KmerA 0.008333333 -0.9513443 0.9680109 0.9997497
## KmerC-KmerA -0.016666667 -0.9763443 0.9430109 0.9989993
## KmerC-KmerB -0.025000000 -0.9846776 0.9346776 0.9977499
BUSCO and reads
## comp.Program comp.Category comp.Score comp.Reads comp.Sample
## 1 Transpi Complete 98.0 33,700,156 MM1
## 2 Transpi Complete 98.0 41,236,457 MM2
## 3 Transpi Complete 98.0 35,598,598 MM3
## 4 Transpi Complete 93.9 41,745,958 MM4
## 5 Transpi Complete 96.1 45,329,544 MM5
## 6 Transpi Complete 94.5 44,469,310 MM6